/*----------------------------------------------------*
       Project : Covid 19
       Purpose : Facebook Mobility Analysis
       Updated : Feb 4 2020 
*-----------------------------------------------------*/

*------- Setting up -------*
version 15
clear all
pause on
set more off
qui cap log c



loc path_LM = "/Users/louis-maeljean/Dropbox (MIT)/West Bengal Information Campaign/AER_I/for_submission"
loc path = "`path_LM'" 		//other users should change this

cd "`path'"


log using "`path'/code/3_log_files/facebook_pop_regs.log", replace
*------- Constructing and saving dataset -------*
/*
insheet using "`path'/data/FacebookMobility/hull_bor_tiles_allsims_latest.csv", comma clear names 
destring simulation_*, replace force
tempfile pscore_sim
save `pscore_sim'

insheet using "`path'/data/FacebookMobility/mapped_tiles.csv", comma clear names
tempfile treatment
save `treatment'

insheet using "`path'/data/FacebookMobility/facebook_pop.csv", comma clear names
rename quadkey key
merge m:1 key using `treatment'
keep if _merge == 3
merge m:1 key using `pscore_sim', nogen
egen t_mean = rowmean(simulation_*)

destring treatment_intensity, replace force

sort key date_time

gen date_time2 = subinstr(date_time, "T", " ", 1)
gen date_time3 = subinstr(date_time2, "Z", "", 1)
gen double date = clock(date_time3, "YMDhms")
format date %tc

gen ratio = n_crisis/n_baseline


save `path'/data/FacebookMobility/facebook_prepped.dta, replace
*/


*------- Loading Dataset -------*

use "`path'/data/FacebookMobility/facebook_prepped.dta", clear


*------- Processing -------*

gen date2 = dofc(date)
bys key date2 (date): gen time = _n						//this is the time series: within each (quadkey,date) combination there are three time zones 
					
drop _merge
drop date_time* date 
destring t_mean, replace force

//Keep one observation per day 
reshape wide clipped_z_score-n_crisis ratio , i(key date2) j(time)

tsset key date2											//set the time series up: panel is key and time variable is date (variation is given by time within each date value)

encode district, gen(district_id)
gen double key_high = key

//Define timing
local start_exp = mdy(5,4,2020)					
local end_exp = mdy(5,5,2020)	

gen pre = inrange(date2,`start_exp'-14, `start_exp'-1)		//Pre Period is define as two weeks before start of Experiment
gen post = inrange(date2, `end_exp', `end_exp'+13)			//Post Period is defined as two weeks after start of Experiment

local pre_full "if inrange(date2, `start_exp'-30, `start_exp' -15)"		//should be -46? instead of -30


//Main outcome and control variables 
egen mean_pop = rowmean(n_crisis*)						//this is giving the mean number of people on that given quadkey and date (mean across night and day)
egen baseline_pop = rowmean(n_baseline*)
egen lagged_pop = rowmean(n_crisis*) if pre
egen lagged_pop_full = max(lagged_pop), by(key)			//this is assigning one value by key equal to the mean number of people across the pre period
	drop lagged_pop
	rename lagged_pop_full lagged_pop
egen lagged_pop2 = rowmean(n_crisis*) `pre_full'
egen lagged_pop2_full = max(lagged_pop2), by(key)
	drop lagged_pop2
	rename lagged_pop2_full lagged_pop2

foreach x of varlist mean_pop baseline_pop lagged_pop  lagged_pop2 {
	gen log_`x' = log(`x')
}

label var log_mean_pop "Cellphone Usage"
label var treatment_intensity "Treatment Intensity"


*------- Results  -------*

//Table
estimates clear

//we control for the nb of people in the pre-period and baseline and control for mean treatment propensity 
eststo: reghdfe log_mean_pop treatment_intensity t_mean  log_lagged_pop log_baseline_pop if inrange(date2, `start_exp', `start_exp'+14), cluster(key_high) absorb(district date) 
eststo: reghdfe log_mean_pop treatment_intensity t_mean  log_lagged_pop log_baseline_pop if inrange(date2, `start_exp', `start_exp'+4), cluster(key_high) absorb(district date)
eststo: reghdfe log_mean_pop treatment_intensity t_mean  log_lagged_pop log_baseline_pop if inrange(date2, `start_exp' +5 , `end_exp'+9), cluster(key_high) absorb(district date)
eststo: reghdfe log_mean_pop treatment_intensity t_mean  log_lagged_pop log_baseline_pop if inrange(date2, `start_exp' +10 , `end_exp'+14), cluster(key_high) absorb(district date)
eststo: reghdfe log_mean_pop treatment_intensity t_mean  log_lagged_pop2 log_baseline_pop if inrange(date2, `start_exp'-7, `start_exp'-1), cluster(key_high) absorb(district date)


//Export to Latex

esttab using "`path'/output/Tables/TableA6_Facebook_regressions.tex", keep(treatment_intensity) cells(b(fmt(4) ) se(par(`"("' `")"') fmt(4) label("")) p(par(`"["' `"]"') fmt(4)))  collabels(none) label mlabel("Post Period" "Day 1-5" "Day 6-10" "Day 11-15" "Pre-Period")  replace booktabs


//Figure over time
reghdfe log_mean_pop c.treatment_intensity#i.date2 c.(  t_mean log_baseline_pop log_lagged_pop2 )##i.date2 if inrange(date2, `start_exp'-7, `start_exp'+14),  cluster(key_high) absorb(district_id##date)

preserve
	regsave
	keep if regexm(var, "treatment_intensity")
	split var, parse("#")
	split var1, parse(".")
	replace var11 = subinstr(var11, "b", "",1)
	destring var11, replace 
	gen date = var11
	format date %td
	gen ub = coef + 1.96*stderr
	gen lb = coef - 1.96*stderr
	twoway (scatter coef date) (rcap ub lb date), yline(0, lcolor(black)) xline(`start_exp', lcolor(black)) legend(label(1 "Estimated Effect of Treatment") label(2 "95% confidence interval")) xlabel(`=`start_exp'-7'(4)`=`start_exp'+14')
	graph export "`path'/output/Figures/Figure2_facebook_coef.pdf", replace
restore




log close


**************************************
** END 
**************************************


